#this is a function that takes in two arrays (cleaned docs, categories) and a param# for which model to run, and returns the auc, loss, accuracy, fpr, tpr, threshold, predictions, and all docs (needed for later gradient calcs).
def CNN_call(clean_docs,categories,model_num,fold_num):
    #old version!!!!
    #from sklearn.model_selection import train_test_split
    #train_doc, test_doc, train_val, test_val = train_test_split(clean_docs,categories,test_size=.1)
    
    #new version!!!
    from sklearn.model_selection import StratifiedKFold
    #from sklearn.externals import joblib
    folder=StratifiedKFold(n_splits=10)
    import numpy as np
    train_sets=[]
    test_sets=[]
    for train, test in folder.split(clean_docs,categories):
        train_sets.append(train)
        test_sets.append(test)
    test_val=categories[test_sets[fold_num]]
    train_val=categories[train_sets[fold_num]]
    
    test_doc=clean_docs[test_sets[fold_num]]
    train_doc=clean_docs[train_sets[fold_num]]
    if model_num==9: #this is the new logistic regression
        #tokenizing
        from keras.preprocessing.text import Tokenizer
        tok=Tokenizer()
        tok.fit_on_texts(train_doc)
        
        #encode with tfidf
        train_encoded_docs=tok.texts_to_matrix(train_doc,mode='tfidf')
        test_encoded_docs=tok.texts_to_matrix(test_doc,mode='tfidf')
       
       #input dimension
        num_words=train_encoded_docs.shape[1]

        from sklearn.linear_model import LogisticRegression
        model = LogisticRegression()
        
        model.fit(train_encoded_docs,train_val)
        test_preds=model.predict(test_encoded_docs).reshape(1,len(test_encoded_docs))[0]
        acc = model.score(test_encoded_docs, test_val)
        import sklearn.metrics as metrics
        fpr, tpr, threshold = metrics.roc_curve(test_val, test_preds)
        roc_auc = metrics.auc(fpr, tpr)
        loss=-100;
        val_loss=-100;
        train_loss=-100;
        tn, fp, fn, tp = metrics.confusion_matrix(test_val,test_preds).ravel()
        return  model, loss, acc, fpr, tpr, roc_auc, test_preds, train_doc, test_doc, train_val, test_val, train_encoded_docs, test_encoded_docs,val_loss,train_loss,tn, fp, fn, tp
    
    if model_num==0: # BOW tfidf with 1 hidden layer and .1 validation
        #tokenizing
        from keras.preprocessing.text import Tokenizer
        tok=Tokenizer()
        tok.fit_on_texts(train_doc)
        
        #encode with tfidf
        train_encoded_docs=tok.texts_to_matrix(train_doc,mode='tfidf')
        test_encoded_docs=tok.texts_to_matrix(test_doc,mode='tfidf')
       
       #input dimension
        num_words=train_encoded_docs.shape[1]
        
        #keras layers needed for import
        from keras.models import Sequential
        from keras.layers import Dense
        from keras.callbacks import ModelCheckpoint

        #make the model
        model= Sequential()
        model.add(Dense(50, input_shape=(num_words,),activation='relu'))
        model.add(Dense(1,activation='sigmoid'))
        model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
        
        #checkpoint
        filepath="weights.best.hdf5"
        checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
        callbacks_list = [checkpoint]
        #fit the model
        history=model.fit(train_encoded_docs,train_val,epochs=25, verbose=2, validation_split = .1,callbacks=callbacks_list)
        model.load_weights("weights.best.hdf5")
        val_loss=history.history['val_loss']
        train_loss=history.history['loss']
        #stats
        loss, acc = model.evaluate(test_encoded_docs, test_val, verbose=2)
        
        #results of prediction
        test_preds=model.predict(test_encoded_docs).reshape(1,len(test_encoded_docs))[0]
        
        #ROC metrics
        import sklearn.metrics as metrics
        fpr, tpr, threshold = metrics.roc_curve(test_val, test_preds)
        roc_auc = metrics.auc(fpr, tpr)
        tn, fp, fn, tp = metrics.confusion_matrix(test_val,test_preds).ravel()
        
        return model, loss, acc, fpr, tpr, roc_auc, test_preds, train_doc, test_doc, train_val, test_val, train_encoded_docs, test_encoded_docs,val_loss,train_loss,tn, fp, fn, tp
    